# association between 25 significant cpgs with metabolic risk factors
library(data.table)
rm(list=ls())
setwd("C:/Users/sijia/Desktop/current working dictionary/methylation/code/20190619 cpg_mediation")
data <- fread("0826_survival_analysis.csv")
# foreach var of varlist cg01545454 cg02386575 cg02591826 cg05681643 cg05740632 cg06358566 cg07219103 cg07560408 cg08106661 cg08484100 cg10400937 cg10643850 cg11630610 cg11754670 cg13311494 cg15833447 cg16639138 cg19583211 cg20302171 cg20562821 cg21210537 cg22794712 cg24792179 cg26334131 cg23398826{
# egen `var'_4g=cut(`var'), group(4)
# }
cpg_25_4g <- fread("cpg_25_4g.csv") 

cpg_25_4g <- cpg_25_4g[,-(2:26)]
cpg_25_4g <- cpg_25_4g[order(cpg_25_4g$v1),]
data <- data[order(data$v1),]
sum(cpg_25_4g$v1!=data$v1)   # 0 

covs<-data.frame(cpg_25_4g,data)
covs$education<-as.numeric(covs$education)
covs$region_code<-factor(covs$region_code)
covs$fasting_time<-factor(covs$fasting_time)
covs$no_batch<-factor(covs$no_batch)

Formula <- formula(paste("y ~cpg+age+gender+education+marital_status+smk+drk+pa1+pa2+diet_score+fasting_time+region_code+no_batch"))
cpg_25_4g <- as.matrix(cpg_25_4g)

fitmodel =function(i){
  
  covs$y=covs$sbp_adj
  covs$cpg=as.factor(cpg_25_4g[,i])
  est=rep(NA,3) 
  se=rep(NA,3)
  pvalue=rep(NA,3)
  tryCatch({
    lm2=lm(Formula,data=covs)
    est=coef(summary(lm2))[2:4,1]
    se=coef(summary(lm2))[2:4,2]
    pvalue=coef(summary(lm2))[2:4,4]   
     
  },warning=function(w) {print(paste("warning",i,sep=" "))}, error = function(e) {print(paste("error",i,sep=" "))})
  
  c(est,se,pvalue)
}
result=as.data.frame(t(sapply(2:26,fitmodel)))
colNames = c("quantile1","quantile2","quantile3")

names(result)=c(paste("e_",colNames,sep=""),paste("se_",colNames,sep=""),paste("p_",colNames,sep=""))
result$probename=colnames(cpg_25_4g)[2:26]

write.table(result,"sbp_adj.csv",quote=F,sep=",",col.name=T,row.name=F)